package com.lucene.test3; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.lucene.utils.BbsBean; import com.lucene.utils.Lucene; import com.lucene.utils.LuceneType; import com.lucene.utils.Page; public class LuceneUtils { /** * 索引文件夹 */ private static String indexDir = "D:\\test\\gm\\index3\\"; private String getIndexDir() { return indexDir; } /** * 分析器 */ private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); // private Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_36); /** * 去除检索内容前的符号 * * @param content * 检索内容 * @return 去除前置符号前的检索内容 */ public String tranStr(String content) { char[] ch = content.toCharArray(); for (int i = 0; i < ch.length; i++) { String str = (ch[i] + "").replace("[^\\w\\s]", ""); if (str.length() > 0) { return content.substring(i).trim(); } } return ""; } /** * 高亮处理,返回处理结果列表 * * @param indexSearch * 检索器 * @param query * 检索内容 * @param topDocs * 检索结果 * @param reValues * 返回参数 * @return * @throws CorruptIndexException * @throws IOException * @throws InvalidTokenOffsetsException */ public List<Map<String, String>> searchHLResult(IndexSearcher indexSearch, Query query, TopDocs topDocs, String[] reValues, String[] fields) throws CorruptIndexException, IOException, InvalidTokenOffsetsException { List<Map<String, String>> list = new ArrayList<Map<String, String>>(); SimpleHTMLFormatter shf = new SimpleHTMLFormatter(LuceneType.HIGH_LIGHT_PRE, LuceneType.HIGH_LIGHT_SUB); // 创建高亮模式 Highlighter hl = new Highlighter(shf, new QueryScorer(query)); // 创建高亮检索 if (topDocs != null) { for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document doc = indexSearch.doc(topDocs.scoreDocs[i].doc); Map<String, String> map = new HashMap<String, String>(); for (String reValue : reValues) { // 根据返回参数设置,循环取得检索结果 for (String field : fields) { if (reValue.equals(field)) { // 判断是否做高亮处理 TokenStream tokenStream = analyzer.tokenStream(reValue, // 取得匹配检索结果 new StringReader(doc.get(reValue))); String str = hl.getBestFragment(tokenStream,doc.get(reValue)); map.put(reValue, str == null ? doc.get(reValue): str); // 判断该高亮处理是否为空 break; // 高亮处理完毕,跳出内层循环 } else { // 不做高亮处理 map.put(reValue, doc.get(reValue)); } } } list.add(map); } } return list; } /** * 普通的全文检索,查询参数与返回参数相同<br/> * * @param content * 查询内容 <br/> * @param url * 索引所在文件夹 例:版块为plate<br/> * @param fields * 获取结果参数和查询参数为相同<br/> * @param start * 当前页 * @param pageSize * 分页大小 * @return * @throws Exception */ public Page<Map<String, String>> search(String[] queryStr,String[] field, int start, int pageSize) throws Exception { Page<Map<String, String>> pager = new Page<Map<String, String>>(start,pageSize, 0, new ArrayList<Map<String, String>>()); IndexReader reader = IndexReader.open(FSDirectory.open(new File(getIndexDir()))); IndexSearcher indexSearch = new IndexSearcher(reader); /** * 同时搜索name和descr两个field,并设定它们在搜索结果排序过程中的权重,权重越高,排名越靠前 * 为了后面解释score问题的方便,这里设置相同的权重 * */ Map<String, Float> boosts = new HashMap<String, Float>(); boosts.put(LuceneType.ALL_TITLE, 1.0f); boosts.put(LuceneType.ALL_CONTENT, 1.0f); /** * 用MultiFieldQueryParser类实现对同一关键词的跨域搜索 * */ QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,field, analyzer); Query query = queryParser.parse(queryStr[0]); CachingWrapperFilter filter = null; // 创建二次索引前的一次缓存 if (queryStr.length > 1) { // 判断是否要进行二次检索 filter = filterIndex(field, queryStr[1]); } Lucene entity = new Lucene(); if(start<=0){start = 1;} TopDocs result = indexSearch.search(query, filter, pageSize*start); int index = (start - 1) * pageSize; if (index > result.totalHits) { index -= pageSize; start--; } ScoreDoc scoreDoc = null; if (index > 0) { scoreDoc = result.scoreDocs[index - 1]; } TopDocs topDocs = indexSearch.searchAfter(scoreDoc, query, filter,pageSize); entity.setTopDocs(topDocs); entity.setRowCount(topDocs.totalHits); // 总条数 entity.setPageSize(pageSize); entity.setCurrent(start); List<Map<String, String>> list = searchHLResult(indexSearch, query,entity.getTopDocs(), field, field); pager = new Page<Map<String, String>>(entity.getCurrent(),pageSize, entity.getRowCount(), list); return pager; } /** * 二次检索的一次缓存检索 * * @param field * 索引字段 * @param content * 检索条件 * @return * @throws ParseException */ public CachingWrapperFilter filterIndex(String[] fields, String content) throws ParseException { String[] field = new String[1]; // 取得检索缓存字段 field[0] = fields[0]; QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,field, analyzer); Query query = queryParser.parse(content); // 创建检索语句 QueryWrapperFilter oldFilter = new QueryWrapperFilter(query); // 创建检索缓存 CachingWrapperFilter filter = new CachingWrapperFilter(oldFilter); // 取得检索结果 return filter; } /** * 新增Bean索引 * * @param revert * @return */ public boolean createrIndex(List<BbsBean> list) { Directory directory = null; IndexWriter indexWriter = null; try{ directory = FSDirectory.open(new File(indexDir)); // 打开索引库 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); indexWriter = new IndexWriter(directory, iwc); indexWriter.deleteAll(); for(int i = 0 ; i < list.size(); i++){ Document doc = new Document(); BbsBean bean = list.get(i); doc.add(new Field(LuceneType.ALL_TYPE, LuceneType.TYPE_TOPIC,Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(LuceneType.ALL_ID, bean.getTopicId(), Store.YES,Index.NOT_ANALYZED)); doc.add(new Field(LuceneType.ALL_TITLE, bean.getTitle(), Store.YES,Index.ANALYZED)); doc.add(new Field(LuceneType.ALL_CONTENT, bean.getRevertContent().replaceAll("<\\S[^>]+>", "").replaceAll("<p>", ""), Store.YES,Index.ANALYZED)); doc.add(new Field(LuceneType.ALL_URL, bean.getRevertUrl(), Store.YES,Index.NOT_ANALYZED)); indexWriter.addDocument(doc); // 将索引关键字添加到文件夹中 } }catch (Exception e) { e.printStackTrace(); return false; } finally { try { if (indexWriter != null) { indexWriter.close(); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return true; } /** * 删除索引<br/> * * @param url * 索引所在文件夹 例:版块为plate<br/> * @param content * 检索内容 * @param field * 删除时的检索字段 * @return */ public boolean deleteIndex(String id, String field) { IndexWriter writer = null; try { Directory dir = FSDirectory.open(new File(indexDir + getIndexDir())); // 打开索引库 writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_36, analyzer)); if (id == null && field == null) { // 是否为全部删除操作 writer.deleteAll(); } else { writer.deleteDocuments(new Term(field, id)); } } catch (Exception e) { return false; } finally { try { if (writer != null) writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return false; } /** * 更新索引 * * @param url * 索引所在文件夹 例:版块为plate<br/> * @param id * 更新的具体对象 * @param field * 更新时的检索字段 * @param doc * 更新内容 * @return */ public boolean updateIndex(String id, String field, Document doc) { IndexWriter writer = null; Directory dir = null; try { writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_36, analyzer)); dir = FSDirectory.open(new File(indexDir + getIndexDir())); // 打开索引库 writer.updateDocument(new Term(field, id), doc); } catch (Exception e) { e.printStackTrace(); return false; } return true; } }